Correlation matrix

ann_colors=list(c("red", "green","blue")[komponenty])
ann=ann_colors[[1]]
kor=cor(all_election_2, use="pairwise.complete.obs")
pheatmap(kor, display_numbers = T, annotation_colors = ann)

PCA

pca <- prcomp(all_election_bezbledow, center = TRUE, scale. = TRUE)
varimax4 <- varimax(pca$rotation[,1:3])
varimax4$loadings
## 
## Loadings:
##                      PC1    PC2    PC3   
## population_size             -0.221       
## forest_density               0.248 -0.140
## longitude                           0.468
## PIS_support                         0.546
## income                      -0.144 -0.162
## postproduction_age    0.251              
## internal_emigration   0.302              
## external_emigration         -0.167 -0.142
## internal_immigration  0.301              
## external_immigration  0.301              
## industry_revenue      0.285              
## empl_agriculture                    0.526
## empl_industry         0.233 -0.113       
## empl_services         0.315              
## poulation_density           -0.263 -0.147
## arrival_SARS                 0.270 -0.123
## size_COVID            0.130 -0.179       
## mob_in                0.281              
## mob_out                     -0.348       
## mobility              0.248 -0.114       
## log_mobility                -0.382       
## pagerank_mob          0.300              
## betweenness_mob       0.146 -0.150       
## closeness_mob               -0.252       
## senior_fraction       0.211  0.155       
## industry_fraction     0.252              
## neighbors_size       -0.115 -0.378       
## neighbors_arr               -0.186       
## PM                          -0.274  0.259
## 
##                  PC1   PC2   PC3
## SS loadings    1.000 1.000 1.000
## Proportion Var 0.034 0.034 0.034
## Cumulative Var 0.034 0.069 0.103
all_pca=summary(pca)
all_pca
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5     PC6
## Standard deviation     3.5960 1.66980 1.50897 1.20151 1.07072 1.01375
## Proportion of Variance 0.4459 0.09615 0.07852 0.04978 0.03953 0.03544
## Cumulative Proportion  0.4459 0.54206 0.62058 0.67036 0.70989 0.74533
##                            PC7    PC8     PC9    PC10    PC11    PC12
## Standard deviation     0.96707 0.9633 0.84831 0.82924 0.81837 0.79415
## Proportion of Variance 0.03225 0.0320 0.02481 0.02371 0.02309 0.02175
## Cumulative Proportion  0.77758 0.8096 0.83439 0.85810 0.88119 0.90294
##                           PC13    PC14    PC15    PC16    PC17    PC18
## Standard deviation     0.69472 0.65449 0.63049 0.56435 0.53630 0.44488
## Proportion of Variance 0.01664 0.01477 0.01371 0.01098 0.00992 0.00682
## Cumulative Proportion  0.91959 0.93436 0.94806 0.95905 0.96896 0.97579
##                           PC19    PC20    PC21    PC22    PC23    PC24
## Standard deviation     0.43328 0.38259 0.32178 0.27864 0.27123 0.25460
## Proportion of Variance 0.00647 0.00505 0.00357 0.00268 0.00254 0.00224
## Cumulative Proportion  0.98226 0.98731 0.99088 0.99356 0.99609 0.99833
##                           PC25    PC26    PC27    PC28      PC29
## Standard deviation     0.14915 0.12292 0.08283 0.06516 5.702e-16
## Proportion of Variance 0.00077 0.00052 0.00024 0.00015 0.000e+00
## Cumulative Proportion  0.99910 0.99962 0.99985 1.00000 1.000e+00

Full reggression model

#spa vs no
arr=all_election_2[,-c(20)]
arr2=all_election_bezbledow[,-c(20,25,27,28,17)]
arr2_=all_election_2[,-c(20,25,27,28,17)]

#spatial
#arr$neighbors_arr=all_election$neighbors_arr

size=all_election_2[,-c(20)]
size2=all_election_bezbledow[,-c(20,25,27,28,16)]
size2_=all_election_2[,-c(20,25,27,28,16)]


#spatial
#size$PM=as.numeric(paste(size$PM))
mod_size=lm(size_COVID~., data=size)
mod_size2=lm(size_COVID~., data=size2)
mod_size2_=lm(size_COVID~., data=size2_)

af <- anova(mod_size)
afss <- af$"Sum Sq"
anova(mod_size)
proc<-cbind(af,PctExp=afss/sum(afss)*100)
et=colnames(size)[-17]
#et=colnames(size)[-16]
#et[24]="Residuals"
proc=proc[-28,]
#proc=proc[-24,]
komponenty1=komponenty[-c(17,20)]

pdf("size_full_.pdf")
plot(proc$PctExp, xaxt = "n", main="%explained variance: No. cases (full  model)", xlab="", ylab = "%",col = c("red", "green","blue")[komponenty1], pch = c(16, 17, 18)[komponenty1])
axis(1, at=1:27, labels=FALSE)
text(seq(1, 27, by=1), par("usr")[3]-0.6, par("usr")[2]+0, 0, cex=0.57, labels = et, srt = 90, pos = 2, xpd = TRUE)
dev.off()
## png 
##   2
mod_arr=lm(arrival_SARS~., data=arr)
mod_arr2=lm(arrival_SARS~., data=arr2)
mod_arr2_=lm(arrival_SARS~., data=arr2_)

af <- anova(mod_arr)
afss <- af$"Sum Sq"
proc<-cbind(af,PctExp=afss/sum(afss)*100)
et=colnames(arr)[-16]
komponenty1=komponenty[-c(16,20)]
#et[24]="Residuals"
#krotsze
#proc=proc[-24,]
proc=proc[-28,]
#plot(proc$PctExp, main="%wyjasnienia zmiennsci czasu_dojscoa w modelu liniowym")
pdf("arr_full_.pdf")
plot(proc$PctExp, xaxt = "n", ylim=c(0,14), main="%explained variance: arrival time (full  model)", xlab="", ylab="%",  col = c("red", "green","blue")[komponenty1], pch = c(16, 17, 18)[komponenty1])
axis(1, at=1:27, labels=FALSE)
text(seq(1, 27, by=1), par("usr")[3] -0.6, par("usr")[2]+0,0, cex=0.6, labels = et, srt = 90, pos = 2, xpd = TRUE)
#legend(1,5, legend=c("a"))
legend(1, 14, legend=c("demographic cluster", "covid-like cluster", "social cluster"), col = c("red", "green","blue"), pch = c(16, 17, 18), cex=0.8)
dev.off()
## png 
##   2
#mod_arr=lm(arrival_SARS~., data=arr)
#summary(mod_arr)
step.model <- stepAIC(mod_arr, direction = "both", trace = FALSE)
step.model2 <- stepAIC(mod_arr2, direction = "both", trace = FALSE)
step.model2_ <- stepAIC(mod_arr2_, direction = "both", trace = FALSE)


#mod_size=lm(size_COVID~., data=size)
step.model_s <- stepAIC(mod_size, direction = "both", trace = FALSE)
step.model_s2 <- stepAIC(mod_size2, direction = "both", trace = FALSE)
step.model_s2_ <- stepAIC(mod_size2_, direction = "both", trace = FALSE)

mod=tab_model(step.model, step.model_s, digits = 2)
mod
  arrival_SARS size_COVID
Predictors Estimates CI p Estimates CI p
(Intercept) 97.43 72.48 – 122.37 <0.001 -57.32 -97.05 – -17.60 0.005
forest_density 7.83 -2.70 – 18.36 0.144
longitude 0.56 -0.21 – 1.32 0.151
PIS_support -0.18 -0.37 – 0.00 0.052
postproduction_age -0.00 -0.00 – 0.00 0.095
internal_emigration 0.00 -0.00 – 0.01 0.082
external_emigration -0.01 -0.02 – 0.00 0.123 0.11 0.05 – 0.17 <0.001
internal_immigration -0.00 -0.00 – -0.00 0.039
empl_agriculture -0.00 -0.00 – 0.00 0.108
size_COVID -0.01 -0.03 – 0.00 0.115
mob_in 0.00 -0.00 – 0.00 0.061
mob_out 0.00 0.00 – 0.00 0.027 0.00 0.00 – 0.01 0.003
log_mobility -8.57 -10.99 – -6.15 <0.001
industry_revenue -0.00 -0.01 – -0.00 0.030
empl_services 0.00 0.00 – 0.00 <0.001
poulation_density -0.02 -0.03 – -0.00 0.028
arrival_SARS -0.44 -0.94 – 0.06 0.081
pagerank_mob -2439.35 -5597.98 – 719.28 0.130
neighbors_size 0.00 0.00 – 0.00 <0.001
PM 1.94 0.92 – 2.95 <0.001
industry_fraction 0.00 -0.00 – 0.00 0.064
Observations 380 380
R2 / R2 adjusted 0.284 / 0.260 0.529 / 0.516
mod2=tab_model(step.model2, step.model_s2, digits = 4, show.ci = FALSE)
mod2
  arrival_SARS size_COVID
Predictors Estimates p Estimates p
(Intercept) 103.4721 <0.001 -99.0446 <0.001
longitude 0.6502 0.096
PIS_support -0.2037 0.033
internal_emigration 0.0030 0.049
external_emigration -0.0123 0.044 0.1402 <0.001
internal_immigration -0.0021 0.085
empl_agriculture -0.0004 0.032
mob_out 0.0004 0.119 0.0042 <0.001
log_mobility -7.6686 <0.001
closeness_mob -258546.5005 0.088
industry_revenue -0.0030 0.018
empl_services 0.0012 <0.001
industry_fraction 0.0001 0.023
PM 2.7087 <0.001
Observations 370 370
R2 / R2 adjusted 0.273 / 0.255 0.514 / 0.505
mod_all=tab_model(step.model,step.model2, step.model_s,step.model_s2, digits = 4, show.ci = FALSE,  show.aic=TRUE)
mod_all
  arrival_SARS arrival_SARS size_COVID size_COVID
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) 97.4251 <0.001 103.4721 <0.001 -57.3222 0.005 -99.0446 <0.001
forest_density 7.8301 0.144
longitude 0.5588 0.151 0.6502 0.096
PIS_support -0.1842 0.052 -0.2037 0.033
postproduction_age -0.0001 0.095
internal_emigration 0.0028 0.082 0.0030 0.049
external_emigration -0.0096 0.123 -0.0123 0.044 0.1110 <0.001 0.1402 <0.001
internal_immigration -0.0025 0.039 -0.0021 0.085
empl_agriculture -0.0003 0.108 -0.0004 0.032
size_COVID -0.0145 0.115
mob_in 0.0002 0.061
mob_out 0.0006 0.027 0.0004 0.119 0.0032 0.003 0.0042 <0.001
log_mobility -8.5689 <0.001 -7.6686 <0.001
closeness_mob -258546.5005 0.088
industry_revenue -0.0028 0.030 -0.0030 0.018
empl_services 0.0018 <0.001 0.0012 <0.001
poulation_density -0.0179 0.028
arrival_SARS -0.4407 0.081
pagerank_mob -2439.3479 0.130
neighbors_size 0.0006 <0.001
PM 1.9359 <0.001 2.7087 <0.001
industry_fraction 0.0001 0.064 0.0001 0.023
Observations 380 370 380 370
R2 / R2 adjusted 0.284 / 0.260 0.273 / 0.255 0.529 / 0.516 0.514 / 0.505
AIC 3060.042 2986.258 4320.816 4209.976
mod_all=tab_model(step.model,step.model2, step.model_s,step.model_s2, digits = 3, show.ci = FALSE,  show.aic=TRUE)
mod_all
  arrival_SARS arrival_SARS size_COVID size_COVID
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) 97.425 <0.001 103.472 <0.001 -57.322 0.005 -99.045 <0.001
forest_density 7.830 0.144
longitude 0.559 0.151 0.650 0.096
PIS_support -0.184 0.052 -0.204 0.033
postproduction_age -0.000 0.095
internal_emigration 0.003 0.082 0.003 0.049
external_emigration -0.010 0.123 -0.012 0.044 0.111 <0.001 0.140 <0.001
internal_immigration -0.002 0.039 -0.002 0.085
empl_agriculture -0.000 0.108 -0.000 0.032
size_COVID -0.014 0.115
mob_in 0.000 0.061
mob_out 0.001 0.027 0.000 0.119 0.003 0.003 0.004 <0.001
log_mobility -8.569 <0.001 -7.669 <0.001
closeness_mob -258546.501 0.088
industry_revenue -0.003 0.030 -0.003 0.018
empl_services 0.002 <0.001 0.001 <0.001
poulation_density -0.018 0.028
arrival_SARS -0.441 0.081
pagerank_mob -2439.348 0.130
neighbors_size 0.001 <0.001
PM 1.936 <0.001 2.709 <0.001
industry_fraction 0.000 0.064 0.000 0.023
Observations 380 370 380 370
R2 / R2 adjusted 0.284 / 0.260 0.273 / 0.255 0.529 / 0.516 0.514 / 0.505
AIC 3060.042 2986.258 4320.816 4209.976
p<-ggplot(all_election_2, aes(x="",y=arrival_SARS-1)) +theme_bw()+
  ylab("days since first case on 04.03") + xlab("density of No. poviats") +
  geom_violin(fill="green")
p

p<-ggplot(all_election_2, aes(x="",y=size_COVID)) +theme_bw()+
  ylab("No. cases")+xlab("density of No. poviats")+ 
  geom_violin(fill="red")
p

moran’s spatial correlations

library(ape)
## Warning: package 'ape' was built under R version 3.6.3
dists.inv <- 1/macierz
diag(dists.inv ) <- 0
Moran.I(all_election_2$arrival_SARS, dists.inv)
## $observed
## [1] 0.02380674
## 
## $expected
## [1] -0.002638522
## 
## $sd
## [1] 0.005191331
## 
## $p.value
## [1] 3.503653e-07
Moran.I(all_election_2$size_COVID, dists.inv)
## $observed
## [1] 0.08381928
## 
## $expected
## [1] -0.002638522
## 
## $sd
## [1] 0.005023173
## 
## $p.value
## [1] 0
all_election=all_election_2
all_election$pred_arr<-mod_arr$fitted.values
all_election$pred_size<-mod_size$fitted.values
all_election$res_size<-mod_size$residuals

#scaling to standard normal distribution
all_election$pred_arr_s<-scale(all_election$pred_arr)
all_election$pred_size_s<-scale(mod_size$fitted.values)
all_election$res_size_s<-scale(mod_size$residuals)
all_election$opt=NA
all_election$opt_bin=0

for (i in 1:380) {
all_election$opt[i]=as.numeric(all_election$res_size_s[i])+as.numeric(all_election$pred_size_s[i])-as.numeric(all_election$pred_arr_s[i])
}

wybrane=which(all_election$opt>0)
sum_all=sum(all_election$opt[wybrane])
all_election$allocation=NA
for (i in wybrane){
  all_election$allocation[i]=ceiling(all_election$opt[i]*1000/sum_all)
  all_election$opt_bin[i]=1
}

all_election2$allocation=all_election$allocation
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.6.2
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/gulak/OneDrive/Documents/R/win-library/3.6/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/gulak/OneDrive/Documents/R/win-library/3.6/rgdal/proj
##  Linking to sp version: 1.3-2
require(rgeos) 
## Loading required package: rgeos
## Warning: package 'rgeos' was built under R version 3.6.2
## rgeos version: 0.5-2, (SVN revision 621)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 
##  Linking to sp version: 1.3-2 
##  Polygon checking: TRUE
poland_powiaty_eqa <- readOGR(dsn="C://Users/gulak/Downloads/Nowy folder/asf/ComputationalEpidemiologyASF-master/ProjektML_HEX/Maps", "powiaty")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\gulak\Downloads\Nowy folder\asf\ComputationalEpidemiologyASF-master\ProjektML_HEX\Maps", layer: "powiaty"
## with 380 features
## It has 29 fields
poland_powiaty <- readOGR(dsn="C://Users/gulak/Downloads/Nowy folder/asf/ComputationalEpidemiologyASF-master/ProjektML_HEX/Maps", "powiaty")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\gulak\Downloads\Nowy folder\asf\ComputationalEpidemiologyASF-master\ProjektML_HEX\Maps", layer: "powiaty"
## with 380 features
## It has 29 fields
poland_powiaty <- spTransform(poland_powiaty,
                              CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))


poland_powiaty <- gSimplify(poland_powiaty,  tol = 5000, topologyPreserve = TRUE)
poland_powiaty_eqa@data$id=c(0:379)

data1 = fortify(poland_powiaty) %>% left_join(.,poland_powiaty_eqa@data %>% mutate(id = as.character(id)), by = "id" )
#data1$jpt_kod_je2=as.character(paste(data1$jpt_kod_je))

for (i in 1:380){
  if (all_election2$ter_map[i]<1000)
  {all_election2$ter_0[i]=paste("0",as.character(all_election2$ter_map[i]), colapse="", sep="")} else
  {all_election2$ter_0[i]=as.character(all_election2$ter_map[i])}
}
all_election2$jpt_kod_je=as.factor(all_election2$ter_0)


#all_election2$jpt_kod_je=as.character(paste(all_election2$c4))
dane= inner_join(all_election2, data1, by = c("jpt_kod_je"= "jpt_kod_je"))


#dag=simplify(dane)
pdf("alokacja_.pdf")
 ggplot() +
  theme_bw()+
  geom_polygon(data = dane, 
               aes(long, lat, group = group, fill = allocation),
               #fill = "grey50", 
               #alpha = 0,lwd=0.1,
               colour = "white") +
  # scale_color_manual(values = mycolors) +
  scale_color_distiller("alocation", palette = "Spectral")+
  ggtitle("Best allocaton of new 1000 Disease Intervention Specialists")
dev.off()
## png 
##   2

diagnostics

pdf("mod_arr_cos2_.pdf")
par(mfrow = c(2,2))
par(ask=F)
plot(step.model2_, bty='l', cex.main = 1, cex.lab=1.5, cex.axis=2, cex = 1)
dev.off()
## png 
##   2
pdf("mod_size_cos2_).pdf")
par(mfrow = c(2,2))
par(ask=F)
plot(step.model_s2_, bty='l', cex.main = 1, cex.lab=1.5, cex.axis=2, cex = 1)
dev.off()
## png 
##   2
ncvTest(step.model_s)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 188.2834, Df = 1, p = < 2.22e-16
# Q-Q plot
ggplot(step.model_s2_, aes(.fitted, .resid)) +
  geom_hline(yintercept = 0) +
  geom_point() +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(step.model_s2_, aes(sample = .stdresid)) + stat_qq() + theme(text = element_text(size = 20)) + geom_abline() 

#Shapiro-Wilk's test: p < 0.05 indicates non-normality:
shapiro.test(resid(step.model_s2_))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(step.model_s2_)
## W = 0.7362, p-value < 2.2e-16
# collinearity
#row.names(arr)=all_election2$id.y
.rowNamesDF(arr2_, make.names=TRUE) <- all_election2$id.y
.rowNamesDF(size2_, make.names=TRUE) <- all_election2$id.y

vif(mod_arr2_)
##      population_size       forest_density            longitude 
##             2.425536             1.318123             2.110235 
##          PIS_support               income   postproduction_age 
##             3.782306             1.631305             3.333989 
##  internal_emigration  external_emigration internal_immigration 
##            45.307621             2.116985            45.625802 
## external_immigration     industry_revenue     empl_agriculture 
##            22.246440             9.489893             2.767571 
##        empl_industry        empl_services    poulation_density 
##            20.041885            91.972231             3.449822 
##               mob_in              mob_out         log_mobility 
##            26.713089             6.294810             6.205280 
##         pagerank_mob      betweenness_mob        closeness_mob 
##            31.840791             5.199595             1.667531 
##    industry_fraction                   PM 
##             5.704358             1.527870
vif(mod_size2_)
##      population_size       forest_density            longitude 
##             2.425536             1.318123             2.110235 
##          PIS_support               income   postproduction_age 
##             3.782306             1.631305             3.333989 
##  internal_emigration  external_emigration internal_immigration 
##            45.307621             2.116985            45.625802 
## external_immigration     industry_revenue     empl_agriculture 
##            22.246440             9.489893             2.767571 
##        empl_industry        empl_services    poulation_density 
##            20.041885            91.972231             3.449822 
##               mob_in              mob_out         log_mobility 
##            26.713089             6.294810             6.205280 
##         pagerank_mob      betweenness_mob        closeness_mob 
##            31.840791             5.199595             1.667531 
##    industry_fraction                   PM 
##             5.704358             1.527870
sqrt(vif(step.model_s2_)) > 2
##              income external_emigration    industry_revenue 
##               FALSE               FALSE                TRUE 
##       empl_services             mob_out     betweenness_mob 
##                TRUE               FALSE               FALSE 
##                  PM 
##               FALSE
# outlier test
outlierTest(step.model_s2_)
##     rstudent unadjusted p-value Bonferroni p
## 141 8.944814         1.7973e-17   6.8297e-15
## 226 5.992239         4.9017e-09   1.8626e-06
## 164 4.598794         5.8383e-06   2.2185e-03
## 224 4.275937         2.4238e-05   9.2104e-03
## 45  4.121054         4.6553e-05   1.7690e-02
pdf("influence_arr_.pdf")
influencePlot(step.model2_, id.method="identify", main="Influence Plot for arrival time",
              sub="Circle size is proportional to Cook's distance")
## Warning in plot.window(...): "id.method" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "id.method" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.method" is
## not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "id.method" is
## not a graphical parameter
## Warning in box(...): "id.method" is not a graphical parameter
## Warning in title(...): "id.method" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.method" is not a
## graphical parameter
dev.off()
## png 
##   2
pdf("influence_size2.pdf")
influencePlot(step.model_s2_, id.method="identify", main="Influence Plot for No. cases",
              sub="Circle size is proportional to Cook's distance")
## Warning in plot.window(...): "id.method" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "id.method" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.method" is
## not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "id.method" is
## not a graphical parameter
## Warning in box(...): "id.method" is not a graphical parameter
## Warning in title(...): "id.method" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.method" is not a
## graphical parameter
dev.off()
## png 
##   2
fig <- plot_ly(all_election, x = ~pred_arr, y = ~res_size, z = ~pred_size, color = ~opt_bin, colors = c('#BF382A', '#0C4B8E'))
fig <- fig %>% add_markers()
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Predicted arriva time'),
                                   yaxis = list(title = 'Residuals No. cases'),
                                   zaxis = list(title = 'Predicted No. cases')))

fig